Importing data

countMatrix <- ReadDataFrameFromTsv(file.name.path="../data/NEW_PFCmatrix.txt")
## ../data/NEW_PFCmatrix.txt read from disk!
# head(countMatrix)

designMatrix <- ReadDataFrameFromTsv(file.name.path="../design/all_samples.tsv.csv")
## ../design/all_samples.tsv.csv read from disk!
# head(designMatrix)

filteredCountsProp <- filterLowCounts(counts.dataframe=countMatrix, 
                                    is.normalized=FALSE,
                                    design.dataframe=designMatrix,
                                    cond.col.name="gcondition",
                                    method.type="Proportion")
## features dimensions before normalization: 52636
## Filtering out low count features...
## 15829 features are to be kept for differential expression analysis with filtering method 3

Plot PCA of log unnormalized data

pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)

Normalizations

Upper Quartile Normalization

normPropCountsUqua <- NormalizeData(data.to.normalize=filteredCountsProp, 
                                    norm.type="uqua", 
                                    design.matrix=designMatrix)

pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)

Negative control genes

Estimating Negative Control Genes to normalize data

#### estimating neg controls
source("../R/edgeRFunctions.R")
# cc <- c("KOSD5 - KOHC5", "KORS2 - KOHC7", "WTSD5 - WTHC5", "WTRS2 - WTHC7")
# 
# rescList1 <- applyEdgeR(counts=normPropCountsUqua, design.matrix=designMatrix,
#                         factors.column="gcondition", 
#                         weight.columns=NULL,
#                         contrasts=cc, useIntercept=FALSE, p.threshold=1,
#                         verbose=TRUE)
# neg.ctrl <- character()
# for(i in 1:length(rescList1)) {
#     ind <- which(rescList1[[i]]$FDR > 0.1)
#     neg.ctrl <- c(neg.ctrl, rownames(rescList1[[i]])[ind] )
# }
# neg.ctrl.est <- unique(neg.ctrl)
neg.ctrl.est.df <- ReadDataFrameFromTsv(file.name.path="../data/negative_controls.tsv", row.names.col=NULL)
neg.ctrl.est <- neg.ctrl.est.df$To
ind.ctrls <- which(rownames(normPropCountsUqua) %in% neg.ctrl.est)
counts.neg.ctrls <- normPropCountsUqua[ind.ctrls,]
pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)

Upper Quartile + RUVs Normalization

library(RUVSeq)
#groups <- makeGroups(designMatrix$classic)[1,,drop=FALSE]
groups <- makeGroups(paste0(designMatrix$genotype, designMatrix$classic))[c(1, 3),]
ruvedSExprData <- RUVs(as.matrix(round(normPropCountsUqua)), cIdx = neg.ctrl.est,
                       scIdx = groups, k = 5)

normExprData <- ruvedSExprData$normalizedCounts

pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA+RUV-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA+RUV-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA+RUV-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
pal <- RColorBrewer::brewer.pal(9, "Set1")
plotRLE(normExprData, outline=FALSE, col=pal[designMatrix$gcondition])

edgeR DE Analysis

### edgering
source("../R/edgeRFunctions.R")
source("../R/VolcanoPlotFunctions.R")
source("../R/plotUtils.R")
source("../R/MAPlotFunctions.R")
source("../R/pvalHistPlotFunctions.R")
source("../R/plotUtils.R")
desMat <- cbind(designMatrix, ruvedSExprData$W)
colnames(desMat) <- c(colnames(designMatrix), colnames(ruvedSExprData$W))

cc <- c("KOSD5 - KOHC5", "KORS2 - KOHC7", "WTSD5 - WTHC5", "WTRS2 - WTHC7")

rescList1 <- applyEdgeR(counts=normExprData, design.matrix=desMat,
                        factors.column="gcondition", 
                        weight.columns=c("W_1", "W_2", "W_3", "W_4"),
                        contrasts=cc, useIntercept=FALSE, p.threshold=1,
                        verbose=TRUE)

KOSD5 vs KOHC5

PlotHistPvalPlot(de.results=rescList1[[1]], design.matrix=desMat, 
                show.plot.flag=TRUE, plotly.flag=TRUE, 
                prefix.plot=names(rescList1)[1])
## mapping ensembl gene id using biomart
library(biomaRt)

names <- rownames(rescList1[[1]])[order(rownames(rescList1[[1]]))]

mart = useMart('ENSEMBL_MART_ENSEMBL',dataset='mmusculus_gene_ensembl', 
                host="may2012.archive.ensembl.org") ## using mm9
attrs <- c("ensembl_gene_id", "external_gene_id", "entrezgene")
gene.map <- getBM(attributes=attrs, mart=mart, filters="ensembl_gene_id", 
                  values=names)

## gene.map
gm.no.dp <- gene.map#[-which(duplicated(gene.map$ensembl_gene_id)),]
ind.gm <- which(gm.no.dp$ensembl_gene_id %in% names)
gm.no.dpigm <- gm.no.dp[ind.gm,]
gm.no.dpigm.o <- gm.no.dpigm[order(gm.no.dpigm$ensembl_gene_id),]

## names
ind.nm <- which(names %in% gene.map$ensembl_gene_id)
names.gm <- names[ind.nm]
names.gm.o <- names.gm[order(names.gm)]
ndf.map <- cbind.data.frame(gm.no.dpigm.o,
                    names.gm.o)

res.o <- rescList1[[1]][order(as.integer(rownames(rescList1[[1]]))),]

ind.rn <- which(rownames(res.o) %in% ndf.map$names.gm.o)
res.o$map <- NA
# filtCountsProp.o$ensmap <- NA
res.o$map[ind.rn] <- ndf.map$external_gene_id

PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05)
PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                 show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05)

KORS2 vs KOHC7

PlotHistPvalPlot(de.results=rescList1[[2]], design.matrix=desMat, 
                show.plot.flag=TRUE, plotly.flag=TRUE, 
                prefix.plot=names(rescList1)[2])
## mapping ensembl gene id using biomart
library(biomaRt)

names <- rownames(rescList1[[2]])[order(rownames(rescList1[[2]]))]

mart = useMart('ENSEMBL_MART_ENSEMBL',dataset='mmusculus_gene_ensembl', 
                host="may2012.archive.ensembl.org") ## using mm9
attrs <- c("ensembl_gene_id", "external_gene_id", "entrezgene")
gene.map <- getBM(attributes=attrs, mart=mart, filters="ensembl_gene_id", 
                  values=names)

## gene.map
gm.no.dp <- gene.map[-which(duplicated(gene.map$ensembl_gene_id)),]
ind.gm <- which(gm.no.dp$ensembl_gene_id %in% names)
gm.no.dpigm <- gm.no.dp[ind.gm,]
gm.no.dpigm.o <- gm.no.dpigm[order(gm.no.dpigm$ensembl_gene_id),]

## names
ind.nm <- which(names %in% gene.map$ensembl_gene_id)
names.gm <- names[ind.nm]
names.gm.o <- names.gm[order(names.gm)]
ndf.map <- cbind.data.frame(gm.no.dpigm.o,
                    names.gm.o)

res.o <- rescList1[[2]][order(rownames(rescList1[[2]])),]

ind.rn <- which(rownames(res.o) %in% ndf.map$names.gm.o)
res.o$map <- NA
# filtCountsProp.o$ensmap <- NA
res.o$map[ind.rn] <- ndf.map$external_gene_id

PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05)
PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                 show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05)

WTSD5 vs WTHC5

PlotHistPvalPlot(de.results=rescList1[[3]], design.matrix=desMat, 
                show.plot.flag=TRUE, plotly.flag=TRUE, 
                prefix.plot=names(rescList1)[3])
## mapping ensembl gene id using biomart
library(biomaRt)


names <- rownames(rescList1[[3]])[order(rownames(rescList1[[3]]))]

mart = useMart('ENSEMBL_MART_ENSEMBL',dataset='mmusculus_gene_ensembl', 
                host="may2012.archive.ensembl.org") ## using mm9
attrs <- c("ensembl_gene_id", "external_gene_id", "entrezgene")
gene.map <- getBM(attributes=attrs, mart=mart, filters="ensembl_gene_id", 
                  values=names)

## gene.map
gm.no.dp <- gene.map[-which(duplicated(gene.map$ensembl_gene_id)),]
ind.gm <- which(gm.no.dp$ensembl_gene_id %in% names)
gm.no.dpigm <- gm.no.dp[ind.gm,]
gm.no.dpigm.o <- gm.no.dpigm[order(gm.no.dpigm$ensembl_gene_id),]

## names
ind.nm <- which(names %in% gene.map$ensembl_gene_id)
names.gm <- names[ind.nm]
names.gm.o <- names.gm[order(names.gm)]
ndf.map <- cbind.data.frame(gm.no.dpigm.o,
                    names.gm.o)

res.o <- rescList1[[3]][order(rownames(rescList1[[3]])),]

ind.rn <- which(rownames(res.o) %in% ndf.map$names.gm.o)
res.o$map <- NA
# filtCountsProp.o$ensmap <- NA
res.o$map[ind.rn] <- ndf.map$external_gene_id

PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[3], threshold=0.05)
PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                 show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[3], threshold=0.05)

WTRS2 vs WTHC7

PlotHistPvalPlot(de.results=rescList1[[4]], design.matrix=desMat, 
                show.plot.flag=TRUE, plotly.flag=TRUE, 
                prefix.plot=names(rescList1)[4])
## mapping ensembl gene id using biomart
library(biomaRt)

names <- rownames(rescList1[[4]])[order(rownames(rescList1[[4]]))]

mart = useMart('ENSEMBL_MART_ENSEMBL',dataset='mmusculus_gene_ensembl', 
                host="may2012.archive.ensembl.org") ## using mm9
attrs <- c("ensembl_gene_id", "external_gene_id", "entrezgene")
gene.map <- getBM(attributes=attrs, mart=mart, filters="ensembl_gene_id", 
                  values=names)

## gene.map
gm.no.dp <- gene.map#[-which(duplicated(gene.map$ensembl_gene_id)),]
ind.gm <- which(gm.no.dp$ensembl_gene_id %in% names)
gm.no.dpigm <- gm.no.dp[ind.gm,]
gm.no.dpigm.o <- gm.no.dpigm[order(gm.no.dpigm$ensembl_gene_id),]

## names
ind.nm <- which(names %in% gene.map$ensembl_gene_id)
names.gm <- names[ind.nm]
names.gm.o <- names.gm[order(names.gm)]
ndf.map <- cbind.data.frame(gm.no.dpigm.o,
                    names.gm.o)

res.o <- rescList1[[4]][order(rownames(rescList1[[4]])),]

ind.rn <- which(rownames(res.o) %in% ndf.map$names.gm.o)
res.o$map <- NA
# filtCountsProp.o$ensmap <- NA
res.o$map[ind.rn] <- ndf.map$external_gene_id

PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[4], threshold=0.05)
PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                 show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[4], threshold=0.05)